This is the same code as the original analysis, but I’m now modelling the raw amount of buckthorn (instead of the relative abundance)

library(plyr)
library(tidyverse)
library(nlme)
library(lme4)
library(glmmTMB)
library(RColorBrewer)
library(ggrepel)
library(gridExtra)
library(DHARMa)
library(emmeans)
library(predictmeans)
library(dplyr)

1 Create Scores

1.1 Read in & Join EI Data

gap.area <- read.csv("data/gap_area.csv") %>% select(-X, -Gap)
names(gap.area)[4] <-"Gap Area"
str(gap.area)
## 'data.frame':    48 obs. of  4 variables:
##  $ Location: chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category: chr  "EAB Gap" "EAB Gap" "EAB Gap" "EAB Gap" ...
##  $ ID      : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ Gap Area: num  90.4 139.6 114.1 166.4 59.8 ...
dist.edge <- read.csv("data/dist_edge.csv")
names(dist.edge)[4]<-"Edge Distance"
str(dist.edge)
## 'data.frame':    72 obs. of  4 variables:
##  $ Location     : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category     : chr  "EAB Gap" "Other Gap" "Non-Gap" "EAB Gap" ...
##  $ ID           : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ Edge Distance: num  5.59 91.01 85.46 61.89 54.86 ...
frag.size <-read.csv("data/frag_size.csv")
names(frag.size)[2] <- "Fragment Size"
str(frag.size)
## 'data.frame':    6 obs. of  2 variables:
##  $ Location     : chr  "Westminster Ponds" "Meadowlily Woods" "Medway Valley" "Five Points Forest" ...
##  $ Fragment Size: int  199 324 230 174 11 37
shade.tol <- read.csv("data/shade_tol.csv") %>% select(-X, -Gap, -tot.BA)
names(shade.tol)[4] <- "Shade Tolerance"
str(shade.tol)
## 'data.frame':    24 obs. of  4 variables:
##  $ Location       : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category       : chr  "Non-Gap" "Non-Gap" "Non-Gap" "Non-Gap" ...
##  $ ID             : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ Shade Tolerance: num  4.44 4.52 4.18 4.53 3.33 ...
tree <- read.csv("data/percent_t.csv") %>% select(-X) 
names(tree)[4] <- "Percent Tree"
str(tree)
## 'data.frame':    72 obs. of  4 variables:
##  $ Location    : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ ID          : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ Category    : chr  "EAB Gap" "Non-Gap" "Other Gap" "EAB Gap" ...
##  $ Percent Tree: int  50 70 60 90 50 40 80 110 120 30 ...
mgmt <- read.csv("data/management.csv")
str(mgmt)
## 'data.frame':    6 obs. of  3 variables:
##  $ Location            : chr  "Westminster Ponds" "Meadowlily Woods" "Medway Valley" "Five Points Forest" ...
##  $ Management          : chr  "Public" "Public" "Public" "Private" ...
##  $ Buckthorn_Management: int  1 1 1 1 0 0
EI <- full_join(dist.edge, gap.area, by = c("Location", "Category", "ID"))
EI2 <- full_join(EI, frag.size, by = c("Location"))
EI3 <- full_join(EI2, shade.tol, by = c("Location", "Category", "ID"))
EI4 <- full_join(EI3, tree, by = c("Location", "Category", "ID"))
EI5 <- full_join(EI4, mgmt, by = c("Location"))
EI5$Category <- as.factor(EI5$Category)
names(EI5) <- sub(" ", ".", names(EI5)) #Replace spaces with .
str(EI5)
## 'data.frame':    72 obs. of  10 variables:
##  $ Location            : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category            : Factor w/ 3 levels "EAB Gap","Non-Gap",..: 1 3 2 1 2 3 3 1 2 1 ...
##  $ ID                  : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ Edge.Distance       : num  5.59 91.01 85.46 61.89 54.86 ...
##  $ Gap.Area            : num  90.4 59.8 NA 139.6 NA ...
##  $ Fragment.Size       : int  37 37 37 37 37 37 37 37 37 37 ...
##  $ Shade.Tolerance     : num  NA NA 4.44 NA 4.52 ...
##  $ Percent.Tree        : int  50 60 70 90 50 40 120 80 110 30 ...
##  $ Management          : chr  "Private" "Private" "Private" "Private" ...
##  $ Buckthorn_Management: int  0 0 0 0 0 0 0 0 0 0 ...

1.2 Create Matrix for EI Analysis

Variables:

  • Edge Distance - plot specific: lower number = lower integrity
  • Fragment Size - site specific: lower number = lower integrity
  • Shade Tolerance - site specific: lower number = lower integrity?
  • Percent Tree - plot specific: lower number = lower integrity?
  • Management - site specific: lower number = lower integrity?
shade.tol.site <- EI5 %>% #summarize shade tolerance by site
  filter(Category == "Non-Gap") %>%
  group_by(Location) %>%
  summarize(Shade.Tolerance.Site = mean(Shade.Tolerance)) 

str(shade.tol.site)
## tibble [6 × 2] (S3: tbl_df/tbl/data.frame)
##  $ Location            : chr [1:6] "Code Farm" "Field Research Station" "Five Points Forest" "Meadowlily Woods" ...
##  $ Shade.Tolerance.Site: num [1:6] 4.42 3.33 4.18 4.02 4.82 ...
str(EI5)
## 'data.frame':    72 obs. of  10 variables:
##  $ Location            : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category            : Factor w/ 3 levels "EAB Gap","Non-Gap",..: 1 3 2 1 2 3 3 1 2 1 ...
##  $ ID                  : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ Edge.Distance       : num  5.59 91.01 85.46 61.89 54.86 ...
##  $ Gap.Area            : num  90.4 59.8 NA 139.6 NA ...
##  $ Fragment.Size       : int  37 37 37 37 37 37 37 37 37 37 ...
##  $ Shade.Tolerance     : num  NA NA 4.44 NA 4.52 ...
##  $ Percent.Tree        : int  50 60 70 90 50 40 120 80 110 30 ...
##  $ Management          : chr  "Private" "Private" "Private" "Private" ...
##  $ Buckthorn_Management: int  0 0 0 0 0 0 0 0 0 0 ...
EI6 <- full_join(EI5, shade.tol.site, by = c("Location")) %>% select(-Shade.Tolerance)

gap.area.site <- EI5 %>% #summarize gap area
  filter(Category != "Non-Gap") %>%
  group_by(Location) %>% 
  summarize(Gap.Area.Site = mean(Gap.Area)) 

EI7 <- full_join(EI6, gap.area.site, by = c("Location")) %>% select(-Gap.Area)

head(EI7)
##    Location  Category ID Edge.Distance Fragment.Size Percent.Tree Management
## 1 Code Farm   EAB Gap  1          5.59            37           50    Private
## 2 Code Farm Other Gap  1         91.01            37           60    Private
## 3 Code Farm   Non-Gap  1         85.46            37           70    Private
## 4 Code Farm   EAB Gap  2         61.89            37           90    Private
## 5 Code Farm   Non-Gap  2         54.86            37           50    Private
## 6 Code Farm Other Gap  2        120.79            37           40    Private
##   Buckthorn_Management Shade.Tolerance.Site Gap.Area.Site
## 1                    0             4.418994      136.4042
## 2                    0             4.418994      136.4042
## 3                    0             4.418994      136.4042
## 4                    0             4.418994      136.4042
## 5                    0             4.418994      136.4042
## 6                    0             4.418994      136.4042
#write.csv(EI7, file = "EI.csv")

1.3 Run without and without gap area

EI7 %>% select(Edge.Distance, Fragment.Size, Percent.Tree, Shade.Tolerance.Site) -> EI.data

prcomp(EI.data, scale. = TRUE) -> EI.pca

summary(EI.pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4
## Standard deviation     1.3486 1.0648 0.8749 0.53106
## Proportion of Variance 0.4547 0.2835 0.1914 0.07051
## Cumulative Proportion  0.4547 0.7381 0.9295 1.00000
biplot(EI.pca)

PC1 <- predict(EI.pca)[,1]
PC2 <- predict(EI.pca)[,2]

EI7 %>% select(Edge.Distance, Fragment.Size, Percent.Tree, Shade.Tolerance.Site, Gap.Area.Site) %>% 
  prcomp(scale. = TRUE) -> EI.pca2

summary(EI.pca2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5
## Standard deviation     1.4839 1.1522 0.9228 0.58280 0.52837
## Proportion of Variance 0.4404 0.2655 0.1703 0.06793 0.05584
## Cumulative Proportion  0.4404 0.7059 0.8762 0.94416 1.00000
biplot(EI.pca2)

PC1b <- predict(EI.pca2)[,1]
PC2b <- predict(EI.pca2)[,2]
rescale.U <- rep(EI.pca$sdev, each = length(EI.pca$sdev)) #get lengths

U.scale2 <- EI.pca$rotation * rescale.U #multiply lengths by sqrt SD

round(U.scale2^2,2) #variability in each variable for each PC
##                       PC1  PC2  PC3  PC4
## Edge.Distance        0.60 0.23 0.07 0.11
## Fragment.Size        0.83 0.00 0.03 0.14
## Percent.Tree         0.03 0.78 0.18 0.02
## Shade.Tolerance.Site 0.37 0.13 0.49 0.01

1.3.1 QQ plots for PCA

qqnorm(EI.data[,1]);qqline(EI.data[,1])

qqnorm(EI.data[,2]);qqline(EI.data[,2])

qqnorm(EI.data[,3]);qqline(EI.data[,3])

qqnorm(EI.data[,4]);qqline(EI.data[,4])

1.3.2 PCA BiPlot

#display.brewer.all(colorblindFriendly = TRUE)
mypalette <-brewer.pal(11,"BrBG")[c(1,3,7,5,9,11)]
#mypalette <- c("","","lightseagreen", "lightskyblue2", "dodgerblue",  "dodgerblue4")
image(1:9,1,as.matrix(1:9),col=mypalette,ylab="",xaxt="n",yaxt="n",bty="n")

U <- data.frame(EI.pca$rotation)
colnames(U) <- colnames(EI.pca$rotation)
rownames(U) <- rownames(EI.pca$rotation)
U$descriptors <- rownames(U)
F.1 <- data.frame(EI.pca$x) # The book calls this matrix F but I use F.1 because in R, F is shorthand for FALSE
colnames(F.1) <- colnames(EI.pca$x)
rownames(F.1) <- rownames(EI.pca$x)
str(U)
## 'data.frame':    4 obs. of  5 variables:
##  $ PC1        : num  0.574 0.674 0.118 0.449
##  $ PC2        : num  -0.44703 0.00827 0.82678 0.34137
##  $ PC3        : num  0.292 0.198 0.486 -0.799
##  $ PC4        : num  -0.621 0.711 -0.257 -0.207
##  $ descriptors: chr  "Edge.Distance" "Fragment.Size" "Percent.Tree" "Shade.Tolerance.Site"
levels(U$descriptors) <- c("Distance from Forest Edge", "Forest Fragment Size", "Tree Regeneration", "Successional Stage")
U$descriptors <- as.factor(U$descriptors)
str(U)
## 'data.frame':    4 obs. of  5 variables:
##  $ PC1        : num  0.574 0.674 0.118 0.449
##  $ PC2        : num  -0.44703 0.00827 0.82678 0.34137
##  $ PC3        : num  0.292 0.198 0.486 -0.799
##  $ PC4        : num  -0.621 0.711 -0.257 -0.207
##  $ descriptors: Factor w/ 4 levels "Edge.Distance",..: 1 2 3 4
F.1$Location<- EI7$Location 
F.1$Category <- EI7$Category 


F.1$Location = factor(F.1$Location, levels=c("Field Research Station", "Code Farm", "Westminster Ponds", "Five Points Forest", "Medway Valley", "Meadowlily Woods"))
F.1$Category = factor(F.1$Category, levels=c("EAB Gap", "Other Gap", "Non-Gap"))

#Change Names
F.1$Location <- revalue(F.1$Location, c("Field Research Station"="Private 1"))
F.1$Location <- revalue(F.1$Location, c("Code Farm"="Private 2"))
F.1$Location <- revalue(F.1$Location, c("Westminster Ponds"="Public 1"))
F.1$Location <- revalue(F.1$Location, c("Five Points Forest"="Private 3"))
F.1$Location <- revalue(F.1$Location, c("Medway Valley"="Public 2"))
F.1$Location <- revalue(F.1$Location, c("Meadowlily Woods"="Public 3"))
levels((F.1$Location))
## [1] "Private 1" "Private 2" "Public 1"  "Private 3" "Public 2"  "Public 3"

Plot the objects

levels(U$descriptors) <- c("Distance from Edge", "Fragment Size", "Tree Regeneration", 
                           "Successional Stage")


biplot1 <- ggplot(F.1, aes(x = PC1, y =PC2)) + 
  geom_point(aes(shape = Category, fill = Location),col = "black", size = 2, pch=21, alpha = 0.9) +
  theme_classic() +
  coord_fixed() +
  labs(x = 'Principle Component 1', y = "Principle Component 2") +
  scale_fill_manual(values = mypalette) +
  geom_segment(data = U, aes(xend = PC1*3, yend = PC2*3,x = 0, y = 0), col = "black", alpha = 0.7, arrow = arrow(length = unit(0.35, "cm"))) +
  geom_label_repel(data = U, aes(x = PC1*3, y = PC2*3, label = descriptors),
                   col = "black", nudge_y = -0.35, 
                   segment.colour = NA, size = 3, alpha = 0.7) +
  theme(legend.position="bottom", legend.box = "horizontal", plot.margin=grid::unit(c(0,0,0,0), "mm"))
biplot1

#ggsave('figures/fig.biplotBrBu.jpeg',biplot1, units = 'cm', width = 14, height =12)

2 First Round of Modelling

2.1 Prepare Data

shrubs.f <- read.csv("data/shrubs_final.csv") %>% select(-"X")
shrubs.EI <- full_join(shrubs.f, EI7, by = c("Location", "Category", "ID")) 
## Warning: Column `Category` joining character vector and factor, coercing into
## character vector
shrubs.EI$Category <- as.factor(shrubs.EI$Category)
names(shrubs.EI) <- sub(" ", ".", names(shrubs.EI)) #Replace spaces with .
str(shrubs.EI)
## 'data.frame':    72 obs. of  15 variables:
##  $ Location            : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category            : Factor w/ 3 levels "EAB Gap","Non-Gap",..: 1 1 1 1 2 2 2 2 3 3 ...
##  $ ID                  : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ richness            : int  2 3 2 3 3 2 3 2 3 1 ...
##  $ H                   : num  0.5 0.965 0.693 1.099 0.736 ...
##  $ percentNN           : num  0 0 0 0 12.5 ...
##  $ percentB            : num  0 0 0 0 12.5 ...
##  $ Buckthorn           : int  0 0 0 0 10 0 0 0 10 0 ...
##  $ Edge.Distance       : num  5.59 61.89 59.79 14.57 85.46 ...
##  $ Fragment.Size       : int  37 37 37 37 37 37 37 37 37 37 ...
##  $ Percent.Tree        : int  50 90 80 30 70 50 110 100 60 40 ...
##  $ Management          : chr  "Private" "Private" "Private" "Private" ...
##  $ Buckthorn_Management: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Shade.Tolerance.Site: num  4.42 4.42 4.42 4.42 4.42 ...
##  $ Gap.Area.Site       : num  136 136 136 136 136 ...

2.1.1 Add values from PCA

shrubs.EI$PC1 <- PC1
shrubs.EI$PC2 <- PC2

2.1.2 Standardize EI

hist(shrubs.EI$PC1)

shrubs.EI %>% summarize(mean = mean(PC1) %>% round(1), sd = sd(PC1) %>% round(1)) #mean already was 0 (centered)
##   mean  sd
## 1    0 1.3
shrubs.EI$PC1.s <- scale(shrubs.EI$PC1, center = TRUE, scale = TRUE)  #standardize
shrubs.EI %>% summarize(mean = mean(PC1.s) %>% round(1), sd = sd(PC1.s)) 
##   mean sd
## 1    0  1
head(shrubs.EI)
##    Location Category ID richness         H percentNN percentB Buckthorn
## 1 Code Farm  EAB Gap  1        2 0.5004024       0.0      0.0         0
## 2 Code Farm  EAB Gap  2        3 0.9649629       0.0      0.0         0
## 3 Code Farm  EAB Gap  3        2 0.6931472       0.0      0.0         0
## 4 Code Farm  EAB Gap  4        3 1.0986123       0.0      0.0         0
## 5 Code Farm  Non-Gap  1        3 0.7356219      12.5     12.5        10
## 6 Code Farm  Non-Gap  2        2 0.5004024       0.0      0.0         0
##   Edge.Distance Fragment.Size Percent.Tree Management Buckthorn_Management
## 1          5.59            37           50    Private                    0
## 2         61.89            37           90    Private                    0
## 3         59.79            37           80    Private                    0
## 4         14.57            37           30    Private                    0
## 5         85.46            37           70    Private                    0
## 6         54.86            37           50    Private                    0
##   Shade.Tolerance.Site Gap.Area.Site        PC1         PC2      PC1.s
## 1             4.418994      136.4042 -1.3830871  0.12935587 -1.0255878
## 2             4.418994      136.4042 -0.8279143 -0.02767019 -0.6139157
## 3             4.418994      136.4042 -0.8262459  0.24628193 -0.6126785
## 4             4.418994      136.4042 -0.8987823  0.85326388 -0.6664657
## 5             4.418994      136.4042 -1.0833052 -0.10406499 -0.8032933
## 6             4.418994      136.4042 -0.7175934 -0.66407255 -0.5321104

2.2 Compare Private & Public

lm.mgmt <- lm(data = shrubs.EI, PC1 ~ Management)
summary(lm.mgmt)
## 
## Call:
## lm(formula = PC1 ~ Management, data = shrubs.EI)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.48175 -1.08831  0.07374  0.81699  1.81340 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.9127     0.1656  -5.510 5.59e-07 ***
## ManagementPublic   1.8255     0.2343   7.793 4.40e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9939 on 70 degrees of freedom
## Multiple R-squared:  0.4645, Adjusted R-squared:  0.4569 
## F-statistic: 60.73 on 1 and 70 DF,  p-value: 4.397e-11
anova(lm.mgmt)
## Analysis of Variance Table
## 
## Response: PC1
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Management  1 59.983  59.983  60.728 4.397e-11 ***
## Residuals  70 69.142   0.988                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lm.mgmt)
##                      2.5 %     97.5 %
## (Intercept)      -1.243108 -0.5823824
## ManagementPublic  1.358287  2.2926933
ggplot(shrubs.EI, aes(x = Management, y = PC1)) +
  geom_boxplot() +
    geom_jitter(alpha=0.6, aes(col = Location)) +
  theme_classic()

2.3 Summarize EI

EI.location <- shrubs.EI %>% group_by(Location) %>%
  summarize(EI = mean(PC1) %>% round(2))
arrange(EI.location, EI)
## # A tibble: 6 x 2
##   Location                  EI
##   <chr>                  <dbl>
## 1 Field Research Station -2.25
## 2 Code Farm              -0.92
## 3 Westminster Ponds      -0.01
## 4 Five Points Forest      0.43
## 5 Medway Valley           1.17
## 6 Meadowlily Woods        1.57
EI.cat <- shrubs.EI %>% group_by(Category) %>%
  summarize(EI = mean(PC1) %>% round(2))
arrange(EI.cat, EI)
## # A tibble: 3 x 2
##   Category     EI
##   <fct>     <dbl>
## 1 EAB Gap   -0.03
## 2 Other Gap  0   
## 3 Non-Gap    0.03

2.3.1 Check distribution

hist(shrubs.f$Buckthorn) #left skewed

(sum(shrubs.EI$Buckthorn > 0) / nrow(shrubs.EI))*100 # 56% zeros
## [1] 55.55556
#shrubs.EI$Category <- relevel(shrubs.EI$Category, ref = "Non-Gap") #re-level to set non-gap as reference condition - removed

Is it more interesting to consider EAB gaps or non-gaps as the reference category? Decided on EAB gaps

2.4 Apply a Poission Generalized MM & check for overdispersion

shrubs.B1 <- glmer(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), family = poisson, data = shrubs.EI)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
E1 <- resid(shrubs.B1, type = "pearson")
N <- nrow(shrubs.EI)
p <- length(fixef(shrubs.B1)) + 1
sum(E1^2) / (N - p)
## [1] 14.59909

Yea, that’s way over 1 (super overdispersed)

2.5 Apply a negative binomial GLMM & check for overdispersion

shrubs.B2 <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), family = "nbinom2", data = shrubs.EI)

E3 <- resid(shrubs.B2, type = "pearson")
N <- nrow(shrubs.EI)
p <- length(fixef(shrubs.B2)$cond) + 1 + 1
sum(E3^2) / (N - p)
## [1] 0.5120524

Now likely underdispersed, but better than before

2.5.1 Model Selection

Select ecological integrity (PC1 / PC2) with and without gap area

#PC1 without gap area
shrubsfit.Ba <- glmmTMB(Buckthorn ~ Category * PC1 + (1 | Location), family = "nbinom2", data = shrubs.EI)
#PC1 with gap area
shrubsfit.Bb <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), family = "nbinom2", data = shrubs.EI)
#PC2 without gap area
shrubsfit.Bc <- glmmTMB(Buckthorn ~ Category * PC2 + (1 | Location), family = "nbinom2", data = shrubs.EI)
#PC2 with gap area
shrubsfit.Bd <- glmmTMB(Buckthorn ~ Category * PC2 + Gap.Area.Site + (1 | Location), family = "nbinom2", data = shrubs.EI)
#gap area in PCA
shrubsfit.Be <- glmmTMB(Buckthorn ~ Category * PC1b + (1 | Location), family = "nbinom2", data = shrubs.EI)
AIC(shrubsfit.Ba, shrubsfit.Bb, shrubsfit.Bc, shrubsfit.Bd, shrubsfit.Be)
##              df      AIC
## shrubsfit.Ba  8 455.3764
## shrubsfit.Bb  9 453.7808
## shrubsfit.Bc  8 453.2132
## shrubsfit.Bd  9 455.1510
## shrubsfit.Be  8 458.7470

PC1 with gap area (shrubsfit.Bb) is best

shrubsfit.Bb <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), family = "nbinom2", data = shrubs.EI)
shrubsfit.Bb.no.inter <- glmmTMB(Buckthorn ~ Category + PC1 + Gap.Area.Site + (1 | Location), family = "nbinom2", data = shrubs.EI)
shrubsfit.null <- glmmTMB(Buckthorn ~ 1 + (1 | Location), family = "nbinom2", data = shrubs.EI)
anova(shrubsfit.Bb, shrubsfit.Bb.no.inter)
## Data: shrubs.EI
## Models:
## shrubsfit.Bb.no.inter: Buckthorn ~ Category + PC1 + Gap.Area.Site + (1 | Location), zi=~0, disp=~1
## shrubsfit.Bb: Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), zi=~0, disp=~1
##                       Df    AIC    BIC  logLik deviance  Chisq Chi Df
## shrubsfit.Bb.no.inter  7 455.30 471.23 -220.65   441.30              
## shrubsfit.Bb           9 453.78 474.27 -217.89   435.78 5.5155      2
##                       Pr(>Chisq)  
## shrubsfit.Bb.no.inter             
## shrubsfit.Bb             0.06344 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(shrubsfit.Bb, shrubsfit.Bb.no.inter, shrubsfit.null)
##                       df      AIC
## shrubsfit.Bb           9 453.7808
## shrubsfit.Bb.no.inter  7 455.2962
## shrubsfit.null         3 463.8500

p-value for interaction is now 0.06 (still marginally significant)

summary(shrubs.B2)
##  Family: nbinom2  ( log )
## Formula:          Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location)
## Data: shrubs.EI
## 
##      AIC      BIC   logLik deviance df.resid 
##    453.8    474.3   -217.9    435.8       63 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev. 
##  Location (Intercept) 1.08e-08 0.0001039
## Number of obs: 72, groups:  Location, 6
## 
## Overdispersion parameter for nbinom2 family (): 0.34 
## 
## Conditional model:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            9.07861    2.48643   3.651 0.000261 ***
## CategoryNon-Gap       -2.66823    0.58783  -4.539 5.65e-06 ***
## CategoryOther Gap     -0.56264    0.50306  -1.118 0.263385    
## PC1                   -0.73065    0.28456  -2.568 0.010238 *  
## Gap.Area.Site         -0.04269    0.01725  -2.474 0.013348 *  
## CategoryNon-Gap:PC1   -0.49142    0.46269  -1.062 0.288202    
## CategoryOther Gap:PC1  0.59645    0.35743   1.669 0.095178 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect of gap area now significant

2.5.2 Model Validation

2.5.2.1 Residuals vs fitted values X

There are two types of fitted values for a GLM - the “link” predictions are estimates of \(\eta\) and the response predictions are estimates of \(\mu\).

shrubs.EI$res <- residuals(shrubsfit.Bb, type = "pearson")
shrubs.EI$fit <- predict(shrubsfit.Bb, type = "response")

ggplot(shrubs.EI,aes(x = fit, y = res)) + 
  geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(shrubs.EI, aes(x = PC1, y = res)) + 
  geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

2.5.2.2 DHARMa Plots

simulationOutput1 <- simulateResiduals(fittedModel = shrubsfit.Bb, n = 1000)
plotSimulatedResiduals(simulationOutput = simulationOutput1)
## plotSimulatedResiduals is deprecated, please switch your code to simply using the plot() function

par(mfrow = c(1,3))

2.6 Test Gap Size

names(gap.area) <- sub(" ", ".", names(gap.area)) #Replace spaces with .
gap.test <- lme(Gap.Area~Category,random=~1|Location,data=gap.area)

anova(gap.test)
##             numDF denDF  F-value p-value
## (Intercept)     1    41 96.89140  <.0001
## Category        1    41  1.28749  0.2631
summary(gap.test)
## Linear mixed-effects model fit by REML
##  Data: gap.area 
##        AIC      BIC    logLik
##   566.6578 573.9724 -279.3289
## 
## Random effects:
##  Formula: ~1 | Location
##         (Intercept) Residual
## StdDev: 0.004330794 97.93859
## 
## Fixed effects: Gap.Area ~ Category 
##                      Value Std.Error DF   t-value p-value
## (Intercept)       155.1877  19.99163 41  7.762633  0.0000
## CategoryOther Gap -32.0801  28.27244 41 -1.134678  0.2631
##  Correlation: 
##                   (Intr)
## CategoryOther Gap -0.707
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.4588304 -0.6725202 -0.2158717  0.3635209  2.2730709 
## 
## Number of Observations: 48
## Number of Groups: 6
gap.area %>% group_by(Category) %>% summarize(gap.area = mean(Gap.Area))
## # A tibble: 2 x 2
##   Category  gap.area
##   <chr>        <dbl>
## 1 EAB Gap       155.
## 2 Other Gap     123.
intervals(gap.test,  which = "fixed")
## Approximate 95% confidence intervals
## 
##  Fixed effects:
##                       lower     est.     upper
## (Intercept)       114.81378 155.1877 195.56161
## CategoryOther Gap -89.17744 -32.0801  25.01724
## attr(,"label")
## [1] "Fixed effects:"
ggplot(gap.area, aes(x = Category, y = Gap.Area)) +
  geom_boxplot() +
    geom_jitter(alpha=0.6, aes(col = Location)) +
  theme_classic()

2.6.1 Test Assumptions

Here are the residuals and fitted values:

gap.area$mfit <- fitted(gap.test, level = 0) 

gap.area$mresid <- residuals(gap.test, type = "normalized")

2.6.1.1 Linearity and Equal Variance

ggplot(gap.area, aes(x = mfit, y = mresid)) + 
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 122.95
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 32.241
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1039.4
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 122.95
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 32.241
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 1039.4

2.6.1.2 Normality

ggplot(gap.area, aes(sample = mresid)) + geom_qq() + geom_qq_line()

2.6.1.3 Cook’s Distances

gap.area$cookd<-CookD(gap.test, plot=FALSE)

ggplot(gap.area, aes(seq_along(cookd), cookd))+
  geom_bar(stat="identity", position="identity")+ 
  xlab("Obs. Number")+
  ylab("Cook's distance")+
  theme_classic()

3 Zero-Inflated Model

Terms were considered important and remained in the model if removal caused an increase in AIC of 2 or more (ΔAIC ≥ 2).

All AIC calculations below are based on:

\[ ΔAIC = AIC_{Remove} - AIC_{Include}\]

In some cases, removing the term decreases AIC (ΔAIC < 0)

shrubs.0infl <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location) + Management, ziformula = ~., family = "nbinom2", data = shrubs.EI)

shrubs.hurdle <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location) + Management, ziformula = ~., family = "truncated_nbinom2", data = shrubs.EI)

#summary(shrubs.0infl)
#summary(shrubs.hurdle)

3.1 Step 1: Compare to Negative Binomial

AIC(shrubs.0infl, shrubs.hurdle, shrubs.B2) # Better than neg binomial 
##               df      AIC
## shrubs.0infl  19 427.5488
## shrubs.hurdle 19 427.6976
## shrubs.B2      9 453.7808

Both the zero-inflated and zero-altered model are better than the original negative binomial model

We’re going to use zero-altered because it’s safe to assume that if buckthorn was present, it was recorded

3.2 Step 2: Fixed Effects

shrubs.hurdle <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), 
                        ziformula = ~ Category * PC1 + Gap.Area.Site + Management + (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

3.3 Step 3: Simplify Zero-Inflation

3.3.1 Management

shrubs.hurdle1 <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site +   (1 | Location), 
                        ziformula = ~  Category * PC1 + Gap.Area.Site + (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

AIC(shrubs.hurdle, shrubs.hurdle1)
##                df      AIC
## shrubs.hurdle  18 428.0959
## shrubs.hurdle1 17 426.9657

Drop management, reduces AIC

426.9657-428.0959
## [1] -1.1302

3.3.2 Interaction

shrubs.hurdle1a <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), 
                        ziformula = ~  Category + PC1 + Gap.Area.Site + (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

shrubs.hurdle1b <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), 
                        ziformula = ~ Category * PC1 + Gap.Area.Site + (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)


AIC(shrubs.hurdle1a, shrubs.hurdle1b)
##                 df      AIC
## shrubs.hurdle1a 15 424.8045
## shrubs.hurdle1b 17 426.9657
424.8045-426.9657
## [1] -2.1612

1a - drop interaction, reduced AIC

3.3.3 Gap Size

shrubs.hurdle2c <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), 
                        ziformula = ~  Category + PC1 + (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

AIC(shrubs.hurdle1a, shrubs.hurdle2c)
##                 df      AIC
## shrubs.hurdle1a 15 424.8045
## shrubs.hurdle2c 14 423.5389
423.5389-424.8045
## [1] -1.2656

2c - drop gap area, reducec AIC

3.3.4 PC1

shrubs.hurdle3a <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), 
                        ziformula = ~ Category +  (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)


AIC(shrubs.hurdle2c, shrubs.hurdle3a)
##                 df      AIC
## shrubs.hurdle2c 14 423.5389
## shrubs.hurdle3a 13 422.0300
422.0300-423.5389
## [1] -1.5089

3b - drop PC1, reduced AIC

3.3.5 Category

shrubs.hurdle4 <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), 
                        ziformula = ~ 1 +  (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

AIC(shrubs.hurdle3a, shrubs.hurdle4)
##                 df      AIC
## shrubs.hurdle3a 13 422.0300
## shrubs.hurdle4  11 426.9292
426.9292-422.0300
## [1] 4.8992

Keep Category, dropping increases AIC > 2

3.4 Step 4: Simplify Negative-Binomial

shrubs.hurdle5c <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + Management + (1 | Location), 
                        ziformula = ~ Category +  (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

3.4.1 Management

shrubs.hurdle5d <- glmmTMB(Buckthorn ~ Category * PC1 + Gap.Area.Site + (1 | Location), 
                        ziformula = ~ Category +  (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

AIC(shrubs.hurdle5c, shrubs.hurdle5d)
##                 df      AIC
## shrubs.hurdle5c 14 421.6317
## shrubs.hurdle5d 13 422.0300
422.0300-421.6317
## [1] 0.3983

Drop management

3.4.2 Gap Size

shrubs.hurdle5 <- glmmTMB(Buckthorn ~ Category * PC1 + (1 | Location), 
                        ziformula = ~ Category +  (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

AIC(shrubs.hurdle5d, shrubs.hurdle5)
##                 df      AIC
## shrubs.hurdle5d 13 422.0300
## shrubs.hurdle5  12 423.2072
423.2072-422.0300
## [1] 1.1772

Drop gap size

3.4.3 Interaction

shrubs.hurdle6 <- glmmTMB(Buckthorn ~ Category + PC1 + (1 | Location), 
                        ziformula = ~ Category +  (1 | Location), family = "truncated_nbinom2", data = shrubs.EI)

AIC(shrubs.hurdle5, shrubs.hurdle6)
##                df      AIC
## shrubs.hurdle5 12 423.2072
## shrubs.hurdle6 10 427.9651
427.9651-423.2072
## [1] 4.7579

Keep interaction

3.5 Model Validatation (through simulation)

sim.m5 <- simulate(shrubs.hurdle5, nsim = 999) %>% t() %>% as.data.frame() %>% 
  gather(observation, sim_value)

sim.m5$Location <- rep(shrubs.EI$Location, each = 999)
sim.m5$obs.num <- as.numeric(str_extract(sim.m5$observation, "[:digit:]+"))


str(sim.m5)
## 'data.frame':    71928 obs. of  4 variables:
##  $ observation: chr  "V1" "V1" "V1" "V1" ...
##  $ sim_value  : num  43 35 255 38 0 26 65 40 70 73 ...
##  $ Location   : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ obs.num    : num  1 1 1 1 1 1 1 1 1 1 ...
ggplot() +
  geom_boxplot(data = sim.m5, aes(x = reorder(observation, obs.num), 
                                  y = sim_value + 1, 
                                  col = Location), alpha = 0.5) +
  scale_y_log10() +
  geom_point(data = shrubs.EI, aes(x = 1:72, y = Buckthorn + 1), col = "black") +
  theme_classic()

A lot of uncertainty but the real observations are usually in line the middle 50% of the simulations.

A bit more buckthorn than the model predicts at Westminster Ponds but I think that is consistent with previous knowledge.

3.6 Plot Model Predictions

non.min <- min(subset(shrubs.EI$PC1, shrubs.EI$Category == "Non-Gap"))
non.max<- max(subset(shrubs.EI$PC1, shrubs.EI$Category == "Non-Gap"))
EAB.min <- min(subset(shrubs.EI$PC1, shrubs.EI$Category == "EAB Gap"))
EAB.max <- max(subset(shrubs.EI$PC1, shrubs.EI$Category == "EAB Gap"))
other.min <- min(subset(shrubs.EI$PC1, shrubs.EI$Category == "Other Gap"))
other.max <- max(subset(shrubs.EI$PC1, shrubs.EI$Category == "Other Gap"))


pred <- data.frame(PC1 = c(seq(EAB.min, EAB.max, length.out = 100), 
                               seq(other.min, other.max, length.out = 100), 
                                   seq(non.min, non.max, length.out = 100)),
                   Category = c(rep("EAB Gap", 100), rep("Other Gap", 100), rep("Non-Gap", 100)),
                   Gap.Area.Site = rep(mean(shrubs.EI$Gap.Area.Site),300), # Added gap area
                   Management = c(rep("Private", 150), rep("Public", 150)), 
                   Location = rep(NA, 300)) # This NA gives general predictions instead of location specific predictions

pred$fit <- predict(shrubs.hurdle5, newdata = pred, se.fit = TRUE, type = "link")$fit
pred$se <- predict(shrubs.hurdle5, newdata = pred, se.fit = TRUE, type = "link")$se.fit
str(pred)
## 'data.frame':    300 obs. of  7 variables:
##  $ PC1          : num  -2.28 -2.23 -2.19 -2.14 -2.1 ...
##  $ Category     : chr  "EAB Gap" "EAB Gap" "EAB Gap" "EAB Gap" ...
##  $ Gap.Area.Site: num  139 139 139 139 139 ...
##  $ Management   : chr  "Private" "Private" "Private" "Private" ...
##  $ Location     : logi  NA NA NA NA NA NA ...
##  $ fit          : num  4.29 4.27 4.26 4.24 4.23 ...
##  $ se           : num  0.404 0.398 0.392 0.386 0.38 ...
pred.upr <- pred$fit + 1.96 * pred$se
pred.lwr <- pred$fit - 1.96 * pred$se

pred$upr <- ifelse(exp(pred.upr) < 100, exp(pred.upr), 100)
pred$lwr <- ifelse(exp(pred.lwr) > 0, exp(pred.lwr), 0)

pred$mean <- exp(pred$fit)
mypalette2 = c(brewer.pal(12, "Paired")[c(12,2,4)])
str(pred)
## 'data.frame':    300 obs. of  10 variables:
##  $ PC1          : num  -2.28 -2.23 -2.19 -2.14 -2.1 ...
##  $ Category     : chr  "EAB Gap" "EAB Gap" "EAB Gap" "EAB Gap" ...
##  $ Gap.Area.Site: num  139 139 139 139 139 ...
##  $ Management   : chr  "Private" "Private" "Private" "Private" ...
##  $ Location     : logi  NA NA NA NA NA NA ...
##  $ fit          : num  4.29 4.27 4.26 4.24 4.23 ...
##  $ se           : num  0.404 0.398 0.392 0.386 0.38 ...
##  $ upr          : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ lwr          : num  33 32.9 32.8 32.7 32.6 ...
##  $ mean         : num  72.9 71.8 70.8 69.7 68.6 ...
shrubs.EI$Category = factor(shrubs.EI$Category, levels=c("EAB Gap", "Other Gap", "Non-Gap"))
pred$Category = factor(pred$Category, levels=c("EAB Gap", "Other Gap", "Non-Gap"))


fig.EI <- ggplot(pred, aes(x = PC1, y = mean)) +
    geom_ribbon(aes(x = PC1, ymin = lwr, ymax = upr, fill = Category), alpha = 0.15) +
      geom_point(data = shrubs.EI, aes(x = PC1, y = Buckthorn, col = Category), alpha = 0.6) + 
  geom_line(aes(col = Category), size = 1, alpha = 0.8) +
  labs(x = "Ecological Integrity (index)",
       y = "European buckthorn (% cover)",
       col = "Category") +
  scale_y_continuous(limits = c(0, 100)) +
    scale_colour_manual(values = mypalette2) +
  scale_fill_manual(values = mypalette2) +
    theme_classic() +
 theme(legend.position = "none")

fig.EI

#ggsave('figures/fig.EI.jpeg',fig.EI , units = 'cm', width = 14, height = 10)
fig.EI2 <- ggplot(pred, aes(x = PC1, y = mean)) +
  geom_ribbon(aes(x = PC1, ymin = lwr, ymax = upr, fill = Category), alpha = 0.15) +
  geom_point(data = shrubs.EI, aes(x = PC1, y = Buckthorn, col = Category), alpha = 0.6) + 
  geom_line(aes(col = Category), size = 1, alpha = 0.8) +
  labs(x = "Ecological Integrity (index)",
       y = "European buckthorn (% cover)",
       col = "Category") +
  scale_y_continuous(limits = c(0, 100)) +
  scale_colour_manual(values = mypalette2,  name = "Gap Category", labels = c("Emerald Ash Borer Gap", "Other Canopy Gap", "Non-Gap")) +
  scale_fill_manual(values = mypalette2,  name = "Gap Category", labels = c("Emerald Ash Borer Gap", "Other Canopy Gap", "Non-Gap")) +
  theme_classic() +
  theme(legend.position = "bottom")

fig.EI2

#ggsave('figures/fig.EI2.jpeg',fig.EI2 , units = 'cm', width = 15, height = 12)

Check those high % buckthorn points - where are they coming from?

ggplot(pred, aes(x = PC1, y = mean)) +
  geom_ribbon(aes(x = PC1, ymin = lwr, ymax = upr, fill = Category), alpha = 0.15) +
  geom_point(data = shrubs.EI, aes(x = PC1, y = Buckthorn, col = Category, shape = Location), alpha = 0.6) + 
  geom_line(aes(col = Category), size = 1, alpha = 0.8) +
  labs(x = "Ecological Integrity",
       y = "Buckthorn (%)",
       col = "Category") +
  scale_colour_manual(values = mypalette2) +
  scale_fill_manual(values = mypalette2) +
  theme_classic() 

# theme(legend.position = "none")

shrubs.EI.w <- shrubs.EI %>% filter(Location == "Westminster Ponds")
shrubs.EI.nw <- shrubs.EI %>% filter(Location != "Westminster Ponds")

ggplot(pred, aes(x = PC1, y = mean)) +
  geom_ribbon(aes(x = PC1, ymin = lwr, ymax = upr, fill = Category), alpha = 0.15) +
  geom_point(data = shrubs.EI.nw, aes(x = PC1, y = Buckthorn, col = Category), alpha = 0.6) + 
  geom_point(data = shrubs.EI.w, aes(x = PC1, y = Buckthorn, col = Category), alpha = 0.6, shape = 17) + 
  geom_line(aes(col = Category), size = 1, alpha = 0.8) +
  labs(x = "Ecological Integrity",
       y = "Buckthorn (%)",
       col = "Category") +
  scale_colour_manual(values = mypalette2) +
  scale_fill_manual(values = mypalette2) +
  theme_classic() 

# theme(legend.position = "none")

The 90% are all coming from Westminster Ponds

3.7 Model Summary

summary(shrubs.hurdle5)
##  Family: truncated_nbinom2  ( log )
## Formula:          Buckthorn ~ Category * PC1 + (1 | Location)
## Zero inflation:             ~Category + (1 | Location)
## Data: shrubs.EI
## 
##      AIC      BIC   logLik deviance df.resid 
##    423.2    450.5   -199.6    399.2       60 
## 
## Random effects:
## 
## Conditional model:
##  Groups   Name        Variance Std.Dev.
##  Location (Intercept) 0.103    0.3209  
## Number of obs: 72, groups:  Location, 6
## 
## Zero-inflation model:
##  Groups   Name        Variance Std.Dev.
##  Location (Intercept) 1.218    1.104   
## Number of obs: 72, groups:  Location, 6
## 
## Overdispersion parameter for truncated_nbinom2 family (): 2.93 
## 
## Conditional model:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             3.5180     0.2103  16.728  < 2e-16 ***
## CategoryNon-Gap        -1.4585     0.3674  -3.970 7.19e-05 ***
## CategoryOther Gap      -0.6244     0.2338  -2.671  0.00756 ** 
## PC1                    -0.3386     0.1579  -2.145  0.03198 *  
## CategoryNon-Gap:PC1     0.1869     0.3073   0.608  0.54301    
## CategoryOther Gap:PC1   0.5806     0.1908   3.044  0.00234 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Zero-inflation model:
##                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)       -8.883e-01  6.716e-01  -1.323   0.1859  
## CategoryNon-Gap    1.751e+00  7.193e-01   2.435   0.0149 *
## CategoryOther Gap  3.277e-06  6.794e-01   0.000   1.0000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(fixef(shrubs.hurdle5)$cond) # Intercept expected cover when present, others are multiplicative changes in cover conditional on presence
##           (Intercept)       CategoryNon-Gap     CategoryOther Gap 
##            33.7154190             0.2325890             0.5355936 
##                   PC1   CategoryNon-Gap:PC1 CategoryOther Gap:PC1 
##             0.7128030             1.2055000             1.7871821
exp(fixef(shrubs.hurdle5)$zi) # Intercept is odds of zero, others are odds ratios
##       (Intercept)   CategoryNon-Gap CategoryOther Gap 
##         0.4113465         5.7618230         1.0000033
exp(confint(shrubs.hurdle5))
##                                        2.5 %     97.5 %   Estimate
## cond.(Intercept)                  22.3264815 50.9139553 33.7154190
## cond.CategoryNon-Gap               0.1132062  0.4778683  0.2325890
## cond.CategoryOther Gap             0.3387368  0.8468538  0.5355936
## cond.PC1                           0.5231179  0.9712689  0.7128030
## cond.CategoryNon-Gap:PC1           0.6601346  2.2014151  1.2055000
## cond.CategoryOther Gap:PC1         1.2296463  2.5975110  1.7871821
## Location.cond.Std.Dev.(Intercept)  1.1056405  2.7882306  1.3783666
## zi.(Intercept)                     0.1102974  1.5340883  0.4113465
## zi.CategoryNon-Gap                 1.4069059 23.5968903  5.7618230
## zi.CategoryOther Gap               0.2640439  3.7872731  1.0000033
## Location.zi.Std.Dev.(Intercept)    1.5832860 14.1600376  3.0149229

I think I’ve figured out how to correctly interpret the odds ratios and intercept scores

#Intercept
#Non-Gaps
33.715419*0.1132062 %>% round(2)#lower
## [1] 3.708696
33.715419*0.2325890 %>% round(2) #estimate
## [1] 7.754546
33.715419*0.4778683 %>% round(2)#upper
## [1] 16.1834
#Other-Gaps
33.7154190*0.3387368 %>% round(2)#lower
## [1] 11.46324
33.7154190*0.5355936 %>% round(2)#estimate
## [1] 18.20633
33.7154190*0.8468538 %>% round(2)#upper
## [1] 28.65811
#PC1
#Non-Gaps
0.7128030*0.6601346 %>% round(2)#lower
## [1] 0.47045
0.7128030*1.2055000 %>% round(2)#estimate
## [1] 0.8624916
0.7128030*2.2014151 %>% round(2)#upper
## [1] 1.568167
#Other-Gaps
0.7128030*1.2296463 %>% round(2)#lower 
## [1] 0.8767477
0.7128030*1.7871821 %>% round(2)#estimate
## [1] 1.275917
0.7128030*2.5975110 %>% round(2)#upper
## [1] 1.853288

3.7.1 emtrends

emtrends extracts the slopes which are expressed relative to EAB gaps - I then use exp to “unlink” the predictions.

These values are really just PC1 * (Category:PC1)

I think the levels in here get mixed up somehow because the other-gap and non-gap categories come out mixed up - I’m going to use the values from above but I know where they’re coming from onw.

emtrends(shrubs.hurdle5, pairwise ~ Category, var = "PC1")
## $emtrends
##  Category  PC1.trend    SE df lower.CL upper.CL
##  EAB Gap      -0.339 0.158 60   -0.654  -0.0228
##  Other Gap    -0.152 0.310 60   -0.771   0.4681
##  Non-Gap       0.242 0.177 60   -0.111   0.5953
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast            estimate    SE df t.ratio p.value
##  EAB Gap - Other Gap   -0.187 0.307 60 -0.608  0.8162 
##  EAB Gap - Non-Gap     -0.581 0.191 60 -3.044  0.0096 
##  Other Gap - Non-Gap   -0.394 0.303 60 -1.298  0.4017 
## 
## P value adjustment: tukey method for comparing a family of 3 estimates
exp(-0.339) %>% round(3)
## [1] 0.712
exp(-0.152) %>% round(3)
## [1] 0.859
exp(0.242) %>% round(3)
## [1] 1.274

3.8 Summary Statistics (for table)

str(shrubs.EI)
## 'data.frame':    72 obs. of  20 variables:
##  $ Location            : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category            : Factor w/ 3 levels "EAB Gap","Other Gap",..: 1 1 1 1 3 3 3 3 2 2 ...
##  $ ID                  : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ richness            : int  2 3 2 3 3 2 3 2 3 1 ...
##  $ H                   : num  0.5 0.965 0.693 1.099 0.736 ...
##  $ percentNN           : num  0 0 0 0 12.5 ...
##  $ percentB            : num  0 0 0 0 12.5 ...
##  $ Buckthorn           : int  0 0 0 0 10 0 0 0 10 0 ...
##  $ Edge.Distance       : num  5.59 61.89 59.79 14.57 85.46 ...
##  $ Fragment.Size       : int  37 37 37 37 37 37 37 37 37 37 ...
##  $ Percent.Tree        : int  50 90 80 30 70 50 110 100 60 40 ...
##  $ Management          : chr  "Private" "Private" "Private" "Private" ...
##  $ Buckthorn_Management: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Shade.Tolerance.Site: num  4.42 4.42 4.42 4.42 4.42 ...
##  $ Gap.Area.Site       : num  136 136 136 136 136 ...
##  $ PC1                 : num  -1.383 -0.828 -0.826 -0.899 -1.083 ...
##  $ PC2                 : num  0.1294 -0.0277 0.2463 0.8533 -0.1041 ...
##  $ PC1.s               : num [1:72, 1] -1.026 -0.614 -0.613 -0.666 -0.803 ...
##   ..- attr(*, "scaled:center")= num -1.89e-16
##   ..- attr(*, "scaled:scale")= num 1.35
##  $ res                 : num  -0.582 -0.581 -0.581 -0.581 0.272 ...
##  $ fit                 : num  71.26 47.5 47.44 50.03 6.76 ...
shrubs.summary <- shrubs.EI %>% select(Location, Category, ID, Buckthorn, Edge.Distance, Fragment.Size, Percent.Tree, Shade.Tolerance.Site, PC1) %>% 
  group_by(Location, Category) %>% 
  summarize(Buckthorn = mean(Buckthorn), Edge.Distance = mean(Edge.Distance), Fragment.Size = mean(Fragment.Size), 
            Percent.Tree = mean(Percent.Tree), Shade.Tolerance.Site = mean(Shade.Tolerance.Site), PC1 = mean(PC1)) %>% 
  mutate_if(is.numeric, round, digits = 1)
## `mutate_if()` ignored the following grouping variables:
## Column `Location`
shrubs.summary
## # A tibble: 18 x 8
## # Groups:   Location [6]
##    Location Category Buckthorn Edge.Distance Fragment.Size Percent.Tree
##    <chr>    <fct>        <dbl>         <dbl>         <dbl>        <dbl>
##  1 Code Fa… EAB Gap        0            35.5            37         62.5
##  2 Code Fa… Other G…       5           102.             37         60  
##  3 Code Fa… Non-Gap        2.5          75.9            37         82.5
##  4 Field R… EAB Gap       55            62              11         92.5
##  5 Field R… Other G…       7.5          60.3            11         42.5
##  6 Field R… Non-Gap        5            57.4            11         52.5
##  7 Five Po… EAB Gap       30           192.            174         82.5
##  8 Five Po… Other G…       7.5         187.            174         60  
##  9 Five Po… Non-Gap        2.5         201.            174         60  
## 10 Meadowl… EAB Gap        7.5         257.            324         62.5
## 11 Meadowl… Other G…       2.5         256.            324         85  
## 12 Meadowl… Non-Gap        0           248             324         67.5
## 13 Medway … EAB Gap       17.5         110.            230        100  
## 14 Medway … Other G…      27.5         195.            230         70  
## 15 Medway … Non-Gap        0           136.            230         70  
## 16 Westmin… EAB Gap       47.5          55.8           199        115  
## 17 Westmin… Other G…      37.5          90.3           199        115  
## 18 Westmin… Non-Gap       10            80.7           199        105  
## # … with 2 more variables: Shade.Tolerance.Site <dbl>, PC1 <dbl>
shrubs.summary$Location <- revalue(shrubs.summary$Location, c("Field Research Station"="Private 1"))
shrubs.summary$Location <- revalue(shrubs.summary$Location, c("Code Farm"="Private 2"))
shrubs.summary$Location <- revalue(shrubs.summary$Location, c("Westminster Ponds"="Public 1"))
shrubs.summary$Location <- revalue(shrubs.summary$Location, c("Five Points Forest"="Private 3"))
shrubs.summary$Location <- revalue(shrubs.summary$Location, c("Medway Valley"="Public 2"))
shrubs.summary$Location <- revalue(shrubs.summary$Location, c("Meadowlily Woods"="Public 3"))

#write.csv(shrubs.summary, file = "shrubs_summary.csv")
gap.area.summary <- EI5 %>% #summarize gap area
  filter(Category != "Non-Gap") %>%
  group_by(Location, Category) %>% 
  summarize(Gap.Area.Site = mean(Gap.Area) %>% round(1)) 

gap.area.summary$Location <- revalue(gap.area.summary$Location, c("Field Research Station"="Private 1"))
gap.area.summary$Location <- revalue(gap.area.summary$Location, c("Code Farm"="Private 2"))
gap.area.summary$Location <- revalue(gap.area.summary$Location, c("Westminster Ponds"="Public 1"))
gap.area.summary$Location <- revalue(gap.area.summary$Location, c("Five Points Forest"="Private 3"))
gap.area.summary$Location <- revalue(gap.area.summary$Location, c("Medway Valley"="Public 2"))
gap.area.summary$Location <- revalue(gap.area.summary$Location, c("Meadowlily Woods"="Public 3"))

#write.csv(gap.area.summary, file = "gaps_summary.csv")

3.9 Run Model Without Western FRS

As per reviewer comments - influence of this station

Remove FRS from data

shrubs.EI.wo <- shrubs.EI %>% filter(Location != "Field Research Station") %>% 
  select(Location, Category, ID, Buckthorn, Management, Gap.Area.Site, PC1) 
str(shrubs.EI.wo)
## 'data.frame':    60 obs. of  7 variables:
##  $ Location     : chr  "Code Farm" "Code Farm" "Code Farm" "Code Farm" ...
##  $ Category     : Factor w/ 3 levels "EAB Gap","Other Gap",..: 1 1 1 1 3 3 3 3 2 2 ...
##  $ ID           : int  1 2 3 4 1 2 3 4 1 2 ...
##  $ Buckthorn    : int  0 0 0 0 10 0 0 0 10 0 ...
##  $ Management   : chr  "Private" "Private" "Private" "Private" ...
##  $ Gap.Area.Site: num  136 136 136 136 136 ...
##  $ PC1          : num  -1.383 -0.828 -0.826 -0.899 -1.083 ...

Run model without FRS data

shrubs.hurdle5.wo <- glmmTMB(Buckthorn ~ Category * PC1 + (1 | Location), 
                        ziformula = ~ Category +  (1 | Location), family = "truncated_nbinom2", data = shrubs.EI.wo)

3.9.1 Compare Model Outputs

exp(fixef(shrubs.hurdle5)$cond) # Intercept expected cover when present, others are multiplicative changes in cover conditional on presence
##           (Intercept)       CategoryNon-Gap     CategoryOther Gap 
##            33.7154190             0.2325890             0.5355936 
##                   PC1   CategoryNon-Gap:PC1 CategoryOther Gap:PC1 
##             0.7128030             1.2055000             1.7871821
exp(fixef(shrubs.hurdle5.wo)$cond)
##           (Intercept)     CategoryOther Gap       CategoryNon-Gap 
##            49.2315112             0.3845417             0.1975805 
##                   PC1 CategoryOther Gap:PC1   CategoryNon-Gap:PC1 
##             0.4407283             3.7069936             2.2689714

Remove FRS:

  • Even more buckthorn in average EI EAB gaps
  • Greater discrepancy between EAB gaps & non-gaps
  • Slope of PC1 in EAB gaps increases

Therefore, greater EAB & EI effect when we remove FRS

#Non-gaps
33.7154190 * 0.2325890
## [1] 7.841836
49.2315112 * 0.1975805
## [1] 9.727187
#Other gaps
33.7154190 * 0.5355936
## [1] 18.05776
49.2315112 *  0.3845417
## [1] 18.93157
exp(fixef(shrubs.hurdle5)$zi) # Intercept is odds of zero, others are odds ratios
##       (Intercept)   CategoryNon-Gap CategoryOther Gap 
##         0.4113465         5.7618230         1.0000033
exp(fixef(shrubs.hurdle5.wo)$zi) # Intercept is odds of zero, others are odds ratios
##       (Intercept) CategoryOther Gap   CategoryNon-Gap 
##         0.5730672         0.7672057         4.9923899

Remove FRS:

  • Still equally likely to be present in EAB & other gaps
  • Odds of presense decrease from 1 to 0.76
  • Odds of presence in non-gaps also decrease (5.7 to 5.0 times less likely)
exp(confint(shrubs.hurdle5))
##                                        2.5 %     97.5 %   Estimate
## cond.(Intercept)                  22.3264815 50.9139553 33.7154190
## cond.CategoryNon-Gap               0.1132062  0.4778683  0.2325890
## cond.CategoryOther Gap             0.3387368  0.8468538  0.5355936
## cond.PC1                           0.5231179  0.9712689  0.7128030
## cond.CategoryNon-Gap:PC1           0.6601346  2.2014151  1.2055000
## cond.CategoryOther Gap:PC1         1.2296463  2.5975110  1.7871821
## Location.cond.Std.Dev.(Intercept)  1.1056405  2.7882306  1.3783666
## zi.(Intercept)                     0.1102974  1.5340883  0.4113465
## zi.CategoryNon-Gap                 1.4069059 23.5968903  5.7618230
## zi.CategoryOther Gap               0.2640439  3.7872731  1.0000033
## Location.zi.Std.Dev.(Intercept)    1.5832860 14.1600376  3.0149229
exp(confint(shrubs.hurdle5.wo))
##                                         2.5 %     97.5 %   Estimate
## cond.(Intercept)                  28.52113323 84.9805538 49.2315112
## cond.CategoryOther Gap             0.18017215  0.8207279  0.3845417
## cond.CategoryNon-Gap               0.08133583  0.4799613  0.1975805
## cond.PC1                           0.24618336  0.7890112  0.4407283
## cond.CategoryOther Gap:PC1         1.45022580  9.4756291  3.7069936
## cond.CategoryNon-Gap:PC1           0.53113419  9.6929014  2.2689714
## Location.cond.Std.Dev.(Intercept)  1.00000000        Inf  1.0000605
## zi.(Intercept)                     0.13614110  2.4122474  0.5730672
## zi.CategoryOther Gap               0.18368121  3.2044900  0.7672057
## zi.CategoryNon-Gap                 1.06280095 23.4511994  4.9923899
## Location.zi.Std.Dev.(Intercept)    1.55107946 19.6450909  3.1370843

Remove FRS:

  • In general, confidence intervals become wider (more uncertainty)

3.9.2 Visualize Model Predictions

pred.wo <- data.frame(PC1 = c(seq(EAB.min, EAB.max, length.out = 100), 
                               seq(other.min, other.max, length.out = 100), 
                                   seq(non.min, non.max, length.out = 100)),
                   Category = c(rep("EAB Gap", 100), rep("Other Gap", 100), rep("Non-Gap", 100)),
                   Gap.Area.Site = rep(mean(shrubs.EI$Gap.Area.Site),300), # Added gap area
                   Management = c(rep("Private", 150), rep("Public", 150)), 
                   Location = rep(NA, 300)) # This NA gives general predictions instead of location specific predictions

pred.wo$fit <- predict(shrubs.hurdle5.wo, newdata = pred, se.fit = TRUE, type = "link")$fit
pred.wo$se <- predict(shrubs.hurdle5.wo, newdata = pred, se.fit = TRUE, type = "link")$se.fit
str(pred.wo)
## 'data.frame':    300 obs. of  7 variables:
##  $ PC1          : num  -2.28 -2.23 -2.19 -2.14 -2.1 ...
##  $ Category     : chr  "EAB Gap" "EAB Gap" "EAB Gap" "EAB Gap" ...
##  $ Gap.Area.Site: num  139 139 139 139 139 ...
##  $ Management   : chr  "Private" "Private" "Private" "Private" ...
##  $ Location     : logi  NA NA NA NA NA NA ...
##  $ fit          : num  5.76 5.73 5.69 5.65 5.62 ...
##  $ se           : num  0.886 0.873 0.86 0.847 0.834 ...
pred.upr <- pred.wo$fit + 1.96 * pred$se
pred.lwr <- pred.wo$fit - 1.96 * pred$se

pred.wo$upr <- ifelse(exp(pred.upr) < 100, exp(pred.upr), 100)
pred.wo$lwr <- ifelse(exp(pred.lwr) > 0, exp(pred.lwr), 0)

pred.wo$mean <- exp(pred.wo$fit)
shrubs.EI.wo$Category = factor(shrubs.EI.wo$Category, levels=c("EAB Gap", "Other Gap", "Non-Gap"))
pred.wo$Category = factor(pred.wo$Category, levels=c("EAB Gap", "Other Gap", "Non-Gap"))

fig.EI.wo <- ggplot(pred.wo, aes(x = PC1, y = mean)) +
    geom_ribbon(aes(x = PC1, ymin = lwr, ymax = upr, fill = Category), alpha = 0.15) +
      geom_point(data = shrubs.EI.wo, aes(x = PC1, y = Buckthorn, col = Category), alpha = 0.6) + 
  geom_line(aes(col = Category), size = 1, alpha = 0.8) +
  labs(x = "Ecological Integrity",
       y = "Buckthorn (%)",
       col = "Category") +
  scale_y_continuous(limits = c(0, 100)) +
    scale_colour_manual(values = mypalette2) +
  scale_fill_manual(values = mypalette2) +
    theme_classic() +
 theme(legend.position = "none")

fig.EI

fig.EI.wo
## Warning: Removed 32 row(s) containing missing values (geom_path).

#ggsave('figures/fig.EI.noFRS.jpeg',fig.EI.wo , units = 'cm', width = 14, height = 10)

4 Reproducibility

Sys.time()
## [1] "2020-06-02 15:55:59 PDT"
git2r::repository()
## Local:    master /Users/JenBaron/Documents/UWO/UWO 4th Year/Publication/Analysis/EAB_Manuscript
## Remote:   master @ origin (https://github.com/JenBaron/EAB_Manuscript.git)
## Head:     [e96291d] 2020-05-22: Fixing model selection code, calculating AIC
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] predictmeans_1.0.4 emmeans_1.4.6      DHARMa_0.3.0       gridExtra_2.3     
##  [5] ggrepel_0.8.2      RColorBrewer_1.1-2 glmmTMB_1.0.1      lme4_1.1-23       
##  [9] Matrix_1.2-18      nlme_3.1-147       forcats_0.5.0      stringr_1.4.0     
## [13] dplyr_0.8.5        purrr_0.3.4        readr_1.3.1        tidyr_1.0.2       
## [17] tibble_3.0.1       ggplot2_3.3.0      tidyverse_1.3.0    plyr_1.8.6        
## 
## loaded via a namespace (and not attached):
##  [1] minqa_1.2.4           colorspace_1.4-1      ellipsis_0.3.0       
##  [4] estimability_1.3      fs_1.4.1              rstudioapi_0.11      
##  [7] proxy_0.4-24          farver_2.0.3          fansi_0.4.1          
## [10] mvtnorm_1.1-0         lubridate_1.7.8       xml2_1.3.2           
## [13] codetools_0.2-16      splines_4.0.0         doParallel_1.0.15    
## [16] spaMM_3.2.0           knitr_1.28            jsonlite_1.6.1       
## [19] nloptr_1.2.2.1        pbkrtest_0.4-8.6      broom_0.5.6          
## [22] dbplyr_1.4.3          shiny_1.4.0.2         compiler_4.0.0       
## [25] httr_1.4.1            backports_1.1.6       assertthat_0.2.1     
## [28] fastmap_1.0.1         cli_2.0.2             later_1.0.0          
## [31] ROI.plugin.glpk_0.3-0 htmltools_0.4.0       tools_4.0.0          
## [34] coda_0.19-3           gtable_0.3.0          glue_1.4.0           
## [37] Rcpp_1.0.4.6          slam_0.1-47           cellranger_1.1.0     
## [40] vctrs_0.2.4           iterators_1.0.12      xfun_0.13            
## [43] rvest_0.3.5           mime_0.9              lifecycle_0.2.0      
## [46] statmod_1.4.34        ROI_0.3-3             MASS_7.3-51.6        
## [49] scales_1.1.0          hms_0.5.3             promises_1.1.0       
## [52] TMB_1.7.16            yaml_2.2.1            pbapply_1.4-2        
## [55] stringi_1.4.6         gap_1.2.2             foreach_1.5.0        
## [58] boot_1.3-25           qgam_1.3.2            rlang_0.4.5          
## [61] pkgconfig_2.0.3       evaluate_0.14         lattice_0.20-41      
## [64] labeling_0.3          tidyselect_1.0.0      magrittr_1.5         
## [67] R6_2.4.1              generics_0.0.2        DBI_1.1.0            
## [70] pillar_1.4.3          haven_2.2.0           withr_2.2.0          
## [73] mgcv_1.8-31           modelr_0.1.6          crayon_1.3.4         
## [76] Rglpk_0.6-4           utf8_1.1.4            rmarkdown_2.1        
## [79] grid_4.0.0            readxl_1.3.1          git2r_0.26.1         
## [82] reprex_0.3.0          digest_0.6.25         xtable_1.8-4         
## [85] httpuv_1.5.2          numDeriv_2016.8-1.1   munsell_0.5.0        
## [88] registry_0.5-1